#!/usr/bin/env Rscript

rm(list = ls())

## spouses <- read.csv("formattedByDyad_CaliAllYears_spouses_shorterFormatted.csv.gz")

DATA <- spouses

DATA$dem_e <- DATA$party_cali310_e=="dem"
DATA$dem_a <- DATA$party_cali310_a=="dem"

DATA$rep_e <- DATA$party_cali310_e=="rep"
DATA$rep_a <- DATA$party_cali310_a=="rep"

DATA$count <- rep(1, nrow(DATA))

DATA$same_party <- with(DATA,
                        as.integer(dem_a == dem_e
                                   & dem_e == 1 & dem_a == 1
                                   | rep_e == rep_a
                                   & rep_e == 1 & rep_a == 1
                                   )
                        )

SAME_both <- aggregate(cbind(same_party, count) ~
                  I(2010 - as.numeric(substr(dateofbirth_cali310_e, 1, 4)))
                 ,
                  data = subset(DATA, dem_e + dem_a + rep_e + rep_a == 2),
                  FUN = sum
                  )

SAME_either <- aggregate(cbind(same_party, count) ~
                  I(2010 - as.numeric(substr(dateofbirth_cali310_e, 1, 4)))
                 ,
                         data = subset(
                             DATA,
                             dem_e + dem_a + rep_e + rep_a >= 1 & party_cali310_e!="" & party_cali310_a!=""
                         ),
                  FUN = mean
                  )
